
clear all
set more off


//  Exploit  EVS and WVS data (1981-2020)
//  Sources for EVS and WVS data:
//  https://search.gesis.org/research_data/ZA4804
//  https://dbk.gesis.org/dbksearch/SDesc2.asp?ll=10&notabs=1&af=&nf=&search=&search2=&db=E&no=4804
//  https://www.worldvaluessurvey.org/WVSDocumentationWVL.jsp



use "$data/EVS.dta", replace
append using "$data/WVS.dta"

* Recode the variable indicating the survey, either WVS or EVS
recode S001 (1=1 "EVS")  (missing=0 "WVS"), generate(survey)
drop S001
ren survey S001
order S001

* Recode the last wave 2017-2020, which was not properly encoded
recode S002 (-4=. ) (1=1 "1981-1984") (2=2 "1989-1993") (3=3 "1994-1998") (4=4 "1999-2004") (5=5 "2005-2009") (6=6 "2010-2014") (7=7 "2017-2020"), generate(tag0)
drop S002
ren tag0 S002
order S002, after(S001)
replace s002vs = S002 if missing(s002vs)
recode s002vs (1=1 "1981-1984") (2=2 "1989-1993") (3=3 "1994-1998") (4=4 "1999-2004") (5=5 "2005-2010") (6=6 "2010-2014") (7=7 "2017-2020"), generate(tag1)
drop s002vs
ren tag1 s002vs 
order s002vs, after(S002EVS)
label var S001 "survey adopted"
label var S002 "wave for WVS data only"
label var S002EVS "wave for EVS data only"
label var s002vs "wave for WVS and EVS data combined"
label var S003 "country code"
drop za_nr version doi S029 S030 S031 S032 S033 S034 S035

* Drop duplicates (578 obs):
drop if S036==1
drop S036

*1)Variable x049a contains only valid data for EVS2008;Cases for EVS 2008 are coded as missing values in x049:
replace X049 = x049a if X049 ==-5 & S002EVS ==4

*2) The same of point 1) but with X049A for WVS and observations with missing values 
replace X049 = X049A if missing(X049) 

*3) You can only aggregate the 8 categories of x049 into the 5 of x049a
* in particular the categories: 2000 and less and 2000-5000 can be collapsed to 
* under 5000; 5000-10000 and 10000-20000 into 5000-20000;
* 20-50 and 50-100 into 20-100
recode X049 (1 = 1 "under 5000") (2 = 1 "under 5k") (3 =2 "5-20k") (4=2 "5-20k") (5=3 "20-100k") (6=3 "20-100k") (7=4 "100-500k") (8=5 "over 500k") (-5=-5 "Missing") (-4 = -4 "Not asked") (-2=-2 "No answer") (-1=-1 "Don't know"), gen(tag2)
drop X049
ren tag2 x049
replace x049 = . if x049 ==-5
drop  x049a X049A
ren *, lower
save "$data\integrated_value_surveys_1981_2020.dta", replace

*  Create geographic areas
preserve
keep s003
duplicates drop
tempfile temp
save `temp'
restore
use `temp', replace

import excel "$data\Countries.xlsx", sheet("countries") firstrow clear
ren vs_ s003
destring s003, replace
merge m:1 s003 using `temp.dta'
keep if _merge ==3 
drop _merge
save "$data\countriesid.dta", replace
merge 1:m s003 using "$data\integrated_value_surveys_1981_2020.dta"
drop _merge

describe, varlist
local varbles `r(varlist)'
local categorical "macroarea countryname s009 s009a x048a_n1 x048b_n2 x048c_n3 x048e_n1 x048f_n2 x048g_n3 tradagg survsagg"
local vars: list varbles - categorical
di "`vars'"
foreach i in `vars' {
	replace `i' = . if `i'<0
}
bys countryname: replace s009 = s009[_n-1] if missing(s009) & !missing(s009[_n-1])
bys countryname: replace s009 = s009[_n+1] if missing(s009) & !missing(s009[_n+1])
bys countryname: replace s009a = s009a[_n-1] if missing(s009a) & !missing(s009a[_n-1])
bys countryname: replace s009a = s009a[_n+1] if missing(s009a) & !missing(s009a[_n+1])
sort countryname s002vs s001
save "$data\integrated_value_surveys_1981_2020.dta", replace


*-----------------------------------------------------------------------------*
* 				STEP ONE: AVERAGES COUNTRY-WAVE LEVEL
* 1. Construct averages by country*wave for each question related to family/trust/gender roles
*-----------------------------------------------------------------------------*

use "$data\integrated_value_surveys_1981_2020.dta", replace

*  Select values related to family/trust/gender roles
keep macroarea countryname s001 s002 s002evs s002vs a001 a025 a026 a027 a030 ///
a035 a038 a041 a044 a045 c001 c001_01 c002 c003 a165 d001 ///
d001_b d059 d019 d022 d025 d023  ///
d028 d034 d040 d041 d043 d043_01 d044 d044 d044a ///
d045 d047 d048 d049 d051 c027_4 d057 d058 d060 d061 ///
d054 x026 g006 a042 a062 e023 a173

recode d001 (1=1 "Trust them completely") (2=2 "Trust somewhat") (3=2 "Trust somewhat") (4=3 "Do not trust very much") (5=4 "Do not trust very much"), gen(D001)
replace d001_b = D001 if missing(d001_b) & !missing(D001)
drop d001 D001 
ren d001_b d001 
order c001_01, after(c001)
replace c001 = c001_01 if missing(c001) & !missing(c001_01)
drop c001_01

* Drop variables with too many missing values (i.e., >90%)
foreach i of varlist _all{
	mdesc `i' 
	if `r(percent)' >= 90 drop `i' 
}

* Collapse the dataset at country-wave level
gen startvalue = 1
order startvalue, before(a001)
gen endvalue = 1
decode s002vs, g(wave)
order wave, after(countryname)
gen aux = countryname + "/" + wave
order aux, after(wave)
foreach v of var * {
        local l`v' : variable label `v'
            if `"`l`v''"' == "" {
            local l`v' "`v'"
        }
		}
	
collapse (mean) startvalue-endvalue, by(aux countryname macroarea s001 s002vs s002evs s002)

foreach v of var * {
        label var `v' "`l`v''"
}

drop endvalue startvalue aux
save "$data\simpleaverages_selectedquestions.dta", replace


*-----------------------------------------------------------------------------*
* 				STEP TWO: PAIRING COUNTRIES
* 2. Construct a dataset with pair-wise country comparisons for each of these questions and wave and computing a distance equal to the difference between each country average for that given question in a given wave
*-----------------------------------------------------------------------------*

use "$data\simpleaverages_selectedquestions.dta", replace
describe, varlist
local varbles `r(varlist)'
local except "macroarea countryname wave aux s001 s002 s002evs s002vs"
local vars: list varbles - except
tempfile tempgeneral
save `tempgeneral', replace

global j = 1
tempfile tomerge

foreach v in `vars' {
	if $j==1{	
		use `tempgeneral', clear

		levelsof s002vs, local(year)

		foreach i in `year'{
			preserve 
			sort s002vs
			ren `v' `v'_`i'
			ren countryname country`i'
			ren macroarea area`i'
			keep if s002vs == `i'
			tempfile temp`i'_tomerge
			save `temp`i'_tomerge', replace
			restore
			}

		use `tempgeneral', clear
		sort s002vs
		levelsof s002vs, local(year)
		foreach i in `year'{

			joinby s002vs s001 using `temp`i'_tomerge', unmatched(master)
			ren _merge _merge`i'
			drop _merge`i'
			}

		gen countryname2 = "."
		gen macroarea2 = "."


		levelsof s002vs, local(year)
		foreach i in `year'{
		replace countryname2 = country`i' if !missing(country`i') & countryname2=="."
		replace macroarea2 = area`i' if !missing(area`i') & macroarea2== "."
		drop country`i' area`i'

			}

		ren countryname countryname1
		ren macroarea macroarea1
		order countryname2, after(countryname1)
		order s002vs
		sort countryname1 s002vs countryname2

		drop if countryname1==countryname2

		levelsof s002vs, local(year)
		foreach i in `year'{

			replace `v'_2 = `v'_`i' if missing(`v'_2) & !missing(`v'_`i')

			}
		order macroarea2, before(countryname2)
		keep s001 s002vs s002evs s002 macroarea1 countryname1 macroarea2 countryname2 `v' `v'_2
		ren `v' `v'_1
		gen dist_`v' = `v'_1 -`v'_2

		save `tomerge', replace
		}
	else{
	
		use `tempgeneral', clear

		levelsof s002vs, local(year)

		foreach i in `year'{
			preserve 
			sort s002vs
			ren `v' `v'_`i'
			ren countryname country`i'
			ren macroarea area`i'
			keep if s002vs == `i'
			tempfile temp`i'_tomerge
			save `temp`i'_tomerge', replace
			restore
			}

		use `tempgeneral', clear
		sort s002vs
		levelsof s002vs, local(year)
		foreach i in `year'{

			joinby s002vs s001 using `temp`i'_tomerge', unmatched(master)
			ren _merge _merge`i'
			drop _merge`i'
			}

		gen countryname2 = "."
		gen macroarea2 = "."


		levelsof s002vs, local(year)
		foreach i in `year'{
		replace countryname2 = country`i' if !missing(country`i') & countryname2=="."
		replace macroarea2 = area`i' if !missing(area`i') & macroarea2== "."
		drop country`i' area`i'

			}

		ren countryname countryname1
		ren macroarea macroarea1
		order countryname2, after(countryname1)
		order s002vs
		sort countryname1 s002vs countryname2

		drop if countryname1==countryname2

		levelsof s002vs, local(year)
		foreach i in `year'{

			replace `v'_2 = `v'_`i' if missing(`v'_2) & !missing(`v'_`i')

			}
		order macroarea2, before(countryname2)
		keep s001 s002vs s002evs s002 macroarea1 countryname1 macroarea2 countryname2 `v' `v'_2
		ren `v' `v'_1
		gen dist_`v' = `v'_1 -`v'_2
		
		merge 1:1 s002vs countryname1 countryname2 s001 s002 s002evs using `tomerge', nogen
		save `tomerge', replace
	}
	global j = 2
		
}
order _all, alphabetic
order macroarea* countryname* s001 s002vs s002 s002evs 
sort countryname1 s002vs countryname2
order dist*, last

* Remove duplicates (interviewed by both EVS and WVS during the same wave)

foreach v of var * {
        local l`v' : variable label `v'
            if `"`l`v''"' == "" {
            local l`v' "`v'"
        }
		}

gen startvalue =1
order startvalue, after(s002vs)
gen endvalue =1
collapse (mean) startvalue-endvalue,by(macroarea1 macroarea2 macroarea2 countryname1 countryname2 s002vs)

drop startvalue endvalue

foreach v of var * {
        label var `v' "`l`v''"
}

* Drop variables with missing values before 2004 
foreach d of varlist dist_*{
levelsof s002vs if !missing(`d')
if `r(r)'<2 drop `d' 
}

* Drop variables with few waves (and none after enlargement)
drop dist_d043 dist_d028 dist_d034

describe, varlist
local varbles `r(varlist)'
local except "macroarea1 macroarea2 countryname1 countryname2 s002vs"
local vars: list varbles - except

foreach v in `vars' {
    foreach d of varlist dist_*{
        local l`v' : variable label `v'
		gen temp1 = substr("`d'",6,.)
		gen temp2 = substr("`v'",1,strpos("`v'", "_")-1)
		if temp1==temp2 label var `d' "`l`v''" 
		drop temp*
			}
        }

gen aux1 = cond(countryname1<countryname2, countryname1, countryname2)
gen aux2 = cond(countryname2<countryname1, countryname1, countryname2)
order aux1 aux2, after(countryname2)
decode s002vs, g(wave)
gen tag = aux1 + aux2 + wave
order tag, after(aux2)
drop aux*
sort tag countryname1 s002vs 
by tag: gen byte first = (_n==1)
order first, after(tag)
keep if first ==1

save "$data\pairwiseaverage_selectedquestions.dta", replace












